################################################################
### REPLICATION SCRIPT: State Climate Policy Determinants
### Generates key figures and regression tables
### Original: policy_plotting_12-16-2019.R
################################################################

rm(list = ls())

# Set working directory to the replication folder root
# UPDATE THIS PATH if running on a different machine
setwd(file.path(dirname(dirname(rstudioapi::getActiveDocumentContext()$path))))
# Alternatively, set manually:
# setwd("/path/to/replication")

# ---- Packages ----
library(readxl)
library(dplyr)
library(ggplot2)
library(plm)
library(lme4)
library(arm)
library(lmtest)
library(sandwich)
library(stargazer)

# ---- Load Data ----
data.aceee   <- as.data.frame(read_xlsx("data/policy/aceee/cleaned_aceee.xlsx"))
data.rps     <- as.data.frame(read_xlsx("data/policy/rps/rps_data_cleaned.xlsx"))
data.ftg     <- as.data.frame(read_xlsx("data/policy/free_the_grid/cleaned_ftg.xlsx"))
data.sev     <- as.data.frame(read_xlsx("data/policy/severance/cleaned_sev.xlsx"))
data.pricing <- as.data.frame(read_xlsx("data/policy/pricing/cleaned_pricing.xlsx"))
data.gen     <- as.data.frame(read_xlsx("data/policy/rps/cleaned_state_gen.xlsx"))
data.adapt   <- as.data.frame(read_xlsx("data/policy/rps/adaptation.xlsx"))
data.gas     <- as.data.frame(read_xlsx("data/policy/rps/gas_tax.xlsx"))
data.covs    <- as.data.frame(read_xlsx("data/policy/covariates.xlsx"))
pty.control  <- as.data.frame(read_xlsx("data/party_control/cleaned_pty_control.xlsx"))
pty.control  <- subset(pty.control, select = -state)

# ---- Merge All Data ----
all <- merge(pty.control, data.aceee, by = c("year", "abb"), all.x = TRUE)
all <- merge(all, data.rps, by = c("year", "abb"), all.x = TRUE)
all <- merge(all, data.ftg, by = c("year", "abb"), all.x = TRUE)
all <- merge(all, data.sev, by = c("year", "abb"), all.x = TRUE)
all <- merge(all, data.pricing, by = c("year", "abb"), all.x = TRUE)
all <- merge(all, data.gen, by = c("year", "abb"), all.x = TRUE)
all <- merge(all, data.adapt, by = c("year", "abb"), all.x = TRUE)
all <- merge(all, data.gas, by = c("year", "state"), all.x = TRUE)
all <- merge(all, data.covs, by = c("year", "abb"), all.x = TRUE)

# Restrict to post-2000
all <- subset(all, year >= 2000)

# ---- Variable Construction ----
# Interest groups
all$ff_group  <- all$totutility + all$totoil_gas
all$alt_group <- all$totenv + all$totrenew

# Government control numeric
all$tot_control_num <- ifelse(all$tot_control == "rep", 1,
                        ifelse(all$tot_control == "split", 2, 3))

# Rename and scale policy measures
all$rps     <- all$rps_stringency_pos
all$rps2    <- all$rps_stringency
all$aceee   <- all$score
all$ftg.nem <- all$nem
all$ftg.int <- all$inter
all$sev     <- 100 * all$rate_n
all$pricing <- all$capped
all$nem     <- all$nem_perc
all$emit    <- log(all$mm_tons)

# Composite DG score
all$ftg <- (all$ftg.int + all$ftg.nem) / 2

# Unified government indicators
all$unif_dem <- ifelse(all$gov_control == "dem" & all$leg_control == "dem", 1, 0)
all$unif_rep <- ifelse(all$gov_control == "rep" & all$leg_control == "rep", 1, 0)
all$split    <- ifelse(all$unif_rep == 0 & all$unif_dem == 0, 1, 0)

# Clean labels for plots
all$tot_control_clean <- ifelse(all$tot_control == "dem", "Democrat",
                          ifelse(all$tot_control == "rep", "Republican", "Split"))

# GDP per capita
all$gdp_per            <- 1000000 * all$gdp_all / all$pop
all$gdp_extraction_per <- 1000000 * all$gdp_extraction / all$pop

# Log generation variables
all$wind_log  <- log(all$wind_gen + 1)
all$solar_log <- log(all$solar_gen + 1)
all$coal_log  <- log(all$coal_gen + 1)
all$nem_log   <- log(all$nem_mw + 1)

# Rescale
all$extraction_perc_early <- 100 * all$extraction_perc_early
all$pres_avg  <- 100 * all$pres_avg
all$tot_tax2  <- all$tot_tax / 100000
all$pricing2  <- all$pricing * 100
all$nem.t     <- all$nem * 100
all$extraction <- all$gdp_extraction_per

# Emissions per capita and density
all$tons_cap <- 1000000 * all$mm_tons / all$pop
all$density  <- all$pop / all$sq_miles

# Time-variant presidential vote
all$pres_var <- ifelse(all$year <= 2002, all$pres_dem_2000,
                 ifelse(all$year %in% 2003:2006, all$pres_dem_2004,
                   ifelse(all$year %in% 2007:2010, all$pres_dem_2008,
                     ifelse(all$year %in% 2011:2014, all$pres_dem_2012,
                       all$pres_dem_2016))))
all$pres_dem_policy <- (all$pres_dem_2008 + all$pres_dem_2012) / 2

# Partisan shift
all$partisan_shift  <- all$dempid_est - all$reppid_est
all$partisan_shift2 <- 100 * all$partisan_shift

# ---- Lead Variables ----
all <- all[order(all$abb, all$year), ]
all <- subset(all, select = -state.y)

all <- all %>%
  group_by(abb) %>%
  mutate(rps_lead  = dplyr::lead(rps, 1),
         rps2_lead = dplyr::lead(rps2, 1),
         ftg_lead  = dplyr::lead(ftg, 1),
         aceee_lead = dplyr::lead(aceee, 1),
         electric_savings_lead = dplyr::lead(electric_savings, 1),
         gas_spend_lead = dplyr::lead(gas_spend, 1),
         sev_lead  = dplyr::lead(sev, 1),
         adapt_plan_lead  = dplyr::lead(adapt_plan, 1),
         adapt_plan2_lead = dplyr::lead(adapt_plan2, 1),
         gas_tax_lead = dplyr::lead(gas_tax, 1)) %>%
  ungroup()

# Normalize
all$gdp_per_norm               <- scale(all$gdp_per)[, 1]
all$wind_potential_norm         <- scale(all$wind_potential_adj)[, 1]
all$solar_potential_norm        <- scale(all$solar_potential_adj)[, 1]
all$extraction_perc_early_norm  <- scale(all$extraction_perc_early)[, 1]
all$attitudes_norm              <- scale(all$attitudes)[, 1]
all$pres_avg_norm               <- scale(all$pres_avg)[, 1]

# Energy growth
all <- all %>%
  group_by(abb) %>%
  mutate(wind_diff  = wind_perc[year == 2017] - wind_perc[year == 2000],
         solar_diff = solar_perc[year == 2017] - solar_perc[year == 2000],
         coal_diff  = coal_perc[year == 2017] - coal_perc[year == 2000],
         nem_diff   = nem.t[year == 2017] - nem.t[year == 2011],
         emit_diff  = emit[year == 2016] - emit[year == 2000])

# Pre-period means
all <- all %>%
  group_by(abb) %>%
  mutate(unemp06            = mean(unemp[year <= 2006 & year >= 2000], na.rm = TRUE),
         medianaqi06        = medianaqi[year == 2006],
         extraction06       = gdp_extraction_per[year == 2006],
         electricity_price06 = mean(electricity_price[year <= 2006 & year >= 2000], na.rm = TRUE),
         natgas_price06     = mean(natgas_price[year <= 2006 & year >= 2000], na.rm = TRUE),
         gasoline_price06   = mean(gasoline_price[year <= 2006 & year >= 2000], na.rm = TRUE),
         oilprod06          = oil_1000bbl[year == 2006],
         gasprod06          = gas_mmcf[year == 2006])

# Presidential vote grouping
all$pres2 <- ifelse(all$pres == "rep", "All Republican",
               ifelse(all$pres == "dem", "All Democrat", "Mixed"))


################################################################
### APPENDIX FIGURES: Policy Trends by Presidential Voting
### (Figs A1-A4 in published paper)
################################################################

pres.data <- all %>%
  group_by(year, pres2) %>%
  mutate(mean_rps      = mean(rps, na.rm = TRUE),
         mean_aceee    = mean(aceee, na.rm = TRUE),
         mean_electric_spend = mean(electric_spend, na.rm = TRUE),
         mean_ftg      = mean(ftg, na.rm = TRUE),
         mean_sev      = mean(sev, na.rm = TRUE),
         mean_wind     = mean(wind_perc, na.rm = TRUE),
         mean_solar    = mean(solar_perc, na.rm = TRUE),
         mean_coal     = mean(coal_perc, na.rm = TRUE),
         mean_natgas   = mean(natgas_perc, na.rm = TRUE),
         mean_emit     = mean(emit),
         mean_tons_cap = mean(tons_cap, na.rm = TRUE),
         mean_ext      = mean(gdp_extraction_per, na.rm = TRUE))

# Shared theme for appendix figures
pres_colors <- c("All Democrat" = "blue", "All Republican" = "red", "Mixed" = "purple")
theme_appendix <- theme_bw() +
  theme(axis.text.x = element_text(size = 10, angle = 45, hjust = 1, vjust = 1),
        legend.position = "bottom", legend.title = element_blank())

# -- Fig A1: RPS policy strength trends (2000-2014) --
fig_a1 <- ggplot(pres.data[!is.na(pres.data$rps) & pres.data$year %in% 2000:2014, ], aes(x = year)) +
  geom_line(aes(group = abb, y = rps), color = "grey", linewidth = 0.3) +
  geom_line(aes(color = pres2, y = mean_rps), linewidth = 2) +
  geom_text(data = subset(pres.data, year == 2014), aes(label = abb, y = rps),
            size = 3, position = position_jitter(), check_overlap = TRUE) +
  theme_appendix +
  scale_color_manual(values = pres_colors) +
  labs(x = "", y = "RPS Stringency")
ggsave(fig_a1, file = "figures/fig_a1_rps.png", width = 8, height = 6)

# -- Fig A2: DG policy strength trends (2007-2018) --
fig_a2 <- ggplot(pres.data[!is.na(pres.data$ftg) & pres.data$year %in% 2007:2018, ], aes(x = year)) +
  geom_line(aes(group = abb, y = ftg), color = "grey", linewidth = 0.3) +
  geom_line(aes(color = pres2, y = mean_ftg), linewidth = 2) +
  geom_text(data = subset(pres.data, year == 2018 & !is.na(ftg)), aes(label = abb, y = ftg),
            size = 3, position = position_jitter(), check_overlap = TRUE) +
  theme_appendix +
  scale_color_manual(values = pres_colors) +
  labs(x = "", y = "Freeing the Grid DG Policy Score (1-5)") +
  scale_x_continuous(breaks = scales::pretty_breaks(n = 5))
ggsave(fig_a2, file = "figures/fig_a2_dg.png", width = 8, height = 6)

# -- Fig A3: Energy efficiency policy strength trends (2007-2017) --
fig_a3 <- ggplot(pres.data[!is.na(pres.data$aceee) & pres.data$year %in% 2007:2017, ], aes(x = year)) +
  geom_line(aes(group = abb, y = aceee), color = "grey", linewidth = 0.3) +
  geom_line(aes(color = pres2, y = mean_aceee), linewidth = 2) +
  geom_text(data = subset(pres.data, year == 2017), aes(label = abb, y = aceee),
            size = 3, position = position_jitter(), check_overlap = TRUE) +
  theme_appendix +
  scale_color_manual(values = pres_colors) +
  labs(x = "", y = "ACEEE Score of Energy Efficiency Policy (0-50)") +
  scale_x_continuous(breaks = scales::pretty_breaks(n = 5))
ggsave(fig_a3, file = "figures/fig_a3_aceee.png", width = 8, height = 6)

# -- Fig A4: Severance tax policy strength trends (2004-2013) --
fig_a4 <- ggplot(pres.data[!is.na(pres.data$sev) & pres.data$year %in% 2004:2013, ], aes(x = year)) +
  geom_line(aes(group = abb, y = sev), color = "grey", linewidth = 0.3) +
  geom_line(aes(color = pres2, y = mean_sev), linewidth = 2) +
  geom_text(data = subset(pres.data, year == 2013), aes(label = abb, y = sev),
            size = 3, position = position_jitter(), check_overlap = TRUE) +
  theme_appendix +
  scale_color_manual(values = pres_colors) +
  labs(x = "", y = "Mean Oil and Gas Severance Tax") +
  scale_x_continuous(breaks = scales::pretty_breaks(n = 5))
ggsave(fig_a4, file = "figures/fig_a4_sev.png", width = 8, height = 6)


################################################################
### TABLE 1: Summary Statistics
################################################################

t.data <- subset(all, year <= 2013 & year >= 2006)
t.data <- as.data.frame(t.data)

# Scaling for regression sample
t.data$gdp_per     <- .001 * t.data$gdp_per
t.data$tot_tax2    <- .001 * t.data$tot_tax2
t.data$density2    <- .001 * t.data$density
t.data$tot_tax_pop <- t.data$tot_tax / t.data$pop
t.data$extraction06 <- .001 * t.data$extraction06
t.data$log_pop     <- log(t.data$pop)
t.data$pres_var    <- t.data$pres_var * 100
t.data$pres_dem_2012 <- t.data$pres_dem_2012 * 100
t.data$time        <- t.data$year - 2005
t.data$gasprod06_adjusted <- .00001 * t.data$gasprod06
t.data$oilprod06_adjusted <- .00001 * t.data$oilprod06

# Government control continuous
t.data$leg_control2 <- ifelse(t.data$leg_control == "dem", 1,
                        ifelse(t.data$leg_control == "split", .5, 0))
t.data$gov_control2 <- ifelse(t.data$gov_control == "dem", 1,
                        ifelse(t.data$gov_control == "split", .5, 0))

# Summary statistics
sums <- data.frame(subset(t.data, select = c("rps_lead", "ftg_lead", "aceee_lead", "sev_lead",
                                              "unif_dem", "unif_rep", "partisan_shift2",
                                              "wind_potential_adj", "solar_potential_adj",
                                              "gdp_per", "tot_tax_pop",
                                              "medianaqi06", "unemp06", "unemp", "electricity_price06",
                                              "electricity_price", "gasoline_price", "natgas_price",
                                              "prop_tax", "gasprod06", "oilprod06")))

names.sum <- c("RPS strength", "Free the Grid score", "ACEEE score", "Severance Tax",
               "Unified Democratic", "Unified Republican", "Partisanship (Dem - Rep %)",
               "Wind potential", "Solar potential", "GDP per capita (thousands)", "Tax revenue per capita (thousands)",
               "Air quality (pre-period)", "Unemployment rate (pre-period)", "Unemployment rate",
               "Electricity price (pre-period)", "Electricity price", "Gasoline price", "Natural gas price",
               "Property tax exemption", "Natural gas production (MMcF, pre-period)",
               "Oil production (1000s barrels, pre-period)")

cat("\n\n===== TABLE 1: Summary Statistics =====\n")
stargazer(sums, summary = TRUE, type = "text",
          title = "Summary Statistics",
          digits = 2,
          covariate.labels = names.sum)


################################################################
### TABLE 2: RPS Regression Models
################################################################

# Change variables for models
t.data <- t.data %>%
  group_by(abb) %>%
  mutate(rps_lead_ch = rps_lead - rps_lead[year == 2006]) %>%
  ungroup()

# Column 1: FE, political variables only
rps.mlm.fe <- plm(rps_lead ~ unif_dem + unif_rep + partisan_shift2,
                   data = t.data, effect = "twoways", model = "within",
                   index = c("abb", "year"))

# Column 2: FE, with economic controls
rps.mlm.fe2 <- plm(rps_lead ~ unif_dem + unif_rep + partisan_shift2
                    + gdp_per + tot_tax_pop,
                    data = t.data, effect = "twoways", model = "within",
                    index = c("abb", "year"))

# Column 3: Multilevel model with state-level covariates
rps.mlm4 <- lmer(rps_lead ~ unif_dem + unif_rep + partisan_shift2
                  + gdp_per + tot_tax_pop
                  + wind_potential_adj + solar_potential_adj
                  + medianaqi06 + unemp06 + electricity_price06
                  + (1 | abb) + (1 | year),
                  data = t.data)

order_rps <- c("unif_dem", "unif_rep", "partisan_shift2", "gdp_per", "tot_tax_pop",
               "wind_potential_adj", "solar_potential_adj",
               "medianaqi06", "unemp06", "electricity_price06")
names_rps <- c("Unified Democratic", "Unified Republican", "Partisanship",
               "GDP per capita", "Tax revenue (per capita)",
               "Wind potential", "Solar potential",
               "Air Quality Index (pre-period)", "Unemployment (pre-period)", "Electricity price (pre-period)")

cat("\n\n===== TABLE 2: Predictors of RPS Strength =====\n")
stargazer(rps.mlm.fe, rps.mlm.fe2, rps.mlm4,
          no.space = TRUE, title = "Predictors of RPS Strength", type = "text",
          digits = 2, covariate.labels = names_rps,
          dep.var.caption = "RPS Strength", dep.var.labels.include = FALSE,
          order = order_rps, keep.stat = "n", omit = c("time", "Constant"))


################################################################
### TABLE 3: DG (Free the Grid) Regression Models
################################################################

t.data <- t.data %>%
  group_by(abb) %>%
  mutate(ftg_lead_ch = ftg_lead - ftg_lead[year == 2006]) %>%
  ungroup()

ftg.mlm.fe <- plm(ftg_lead ~ unif_dem + unif_rep + partisan_shift2,
                   data = t.data, effect = "twoways", model = "within",
                   index = c("abb", "year"))

ftg.mlm.fe2 <- plm(ftg_lead ~ unif_dem + unif_rep + partisan_shift2
                    + gdp_per + tot_tax_pop,
                    data = t.data, effect = "twoways", model = "within",
                    index = c("abb", "year"))

ftg.mlm4 <- lmer(ftg_lead ~ unif_dem + unif_rep + partisan_shift2
                  + gdp_per + tot_tax_pop
                  + solar_potential_adj
                  + medianaqi06 + unemp06 + electricity_price06
                  + (1 | abb) + (1 | year),
                  data = t.data)

order_ftg <- c("unif_dem", "unif_rep", "partisan_shift2", "gdp_per", "tot_tax_pop",
               "solar_potential_adj", "medianaqi06", "unemp06", "electricity_price06")
names_ftg <- c("Unified Democratic", "Unified Republican", "Partisanship",
               "GDP per capita", "Tax revenue (per capita)",
               "Solar potential",
               "Air Quality Index (pre-period)", "Unemployment (pre-period)", "Electricity price (pre-period)")

cat("\n\n===== TABLE 3: Predictors of DG (Free the Grid) Score =====\n")
stargazer(ftg.mlm.fe, ftg.mlm.fe2, ftg.mlm4,
          no.space = TRUE, title = "Predictors of DG Policy", type = "text",
          digits = 2, covariate.labels = names_ftg,
          dep.var.caption = "Free the Grid Score", dep.var.labels.include = FALSE,
          order = order_ftg, keep.stat = "n", omit = c("time", "Constant"))


################################################################
### TABLE 4: Energy Efficiency (ACEEE) Regression Models
################################################################

t.data <- t.data %>%
  group_by(abb) %>%
  mutate(aceee_lead_ch = aceee_lead - aceee_lead[year == 2006]) %>%
  ungroup()

aceee.mlm.fe <- plm(aceee_lead ~ unif_dem + unif_rep + partisan_shift2,
                     data = t.data, effect = "twoways", model = "within",
                     index = c("abb", "year"))

aceee.mlm.fe2 <- plm(aceee_lead ~ unif_dem + unif_rep + partisan_shift2
                      + gdp_per + tot_tax_pop,
                      data = t.data, effect = "twoways", model = "within",
                      index = c("abb", "year"))

aceee.mlm3.new <- lmer(aceee_lead ~ unif_dem + unif_rep + partisan_shift2
                        + gdp_per + tot_tax_pop
                        + electricity_price06 + natgas_price06 + gasoline_price06
                        + medianaqi06 + unemp06
                        + (1 | abb) + (1 | year),
                        data = t.data)

order_aceee <- c("unif_dem", "unif_rep", "partisan_shift2", "gdp_per", "tot_tax_pop",
                 "electricity_price06", "natgas_price06", "gasoline_price06", "medianaqi06", "unemp06")
names_aceee <- c("Unified Democratic", "Unified Republican", "Partisanship",
                 "GDP per capita", "Tax revenue (per capita)",
                 "Electricity price (pre-period)", "Natural gas price (pre-period)",
                 "Gasoline price (pre-period)", "Air Quality Index (pre-period)", "Unemployment (pre-period)")

cat("\n\n===== TABLE 4: Predictors of ACEEE Score =====\n")
stargazer(aceee.mlm.fe, aceee.mlm.fe2, aceee.mlm3.new,
          no.space = TRUE, title = "Predictors of ACEEE Energy Efficiency Score", type = "text",
          digits = 2, covariate.labels = names_aceee,
          dep.var.caption = "ACEEE Score", dep.var.labels.include = FALSE,
          order = order_aceee, keep.stat = "n", omit = c("time", "Constant"))


################################################################
### TABLE 5: Severance Tax Regression Models
################################################################

sev.mlm.fe <- plm(sev_lead ~ unif_dem + unif_rep + partisan_shift2,
                   data = t.data, effect = "twoways", model = "within",
                   index = c("abb", "year"))

sev.mlm.fe2 <- plm(sev_lead ~ unif_dem + unif_rep + partisan_shift2
                    + gdp_per + tot_tax_pop,
                    data = t.data, effect = "twoways", model = "within",
                    index = c("abb", "year"))

sev.mlm4 <- lmer(sev_lead ~ unif_dem + unif_rep + partisan_shift2
                  + gdp_per + tot_tax_pop
                  + prop_tax + gasprod06_adjusted + oilprod06_adjusted + unemp
                  + (1 | abb) + (1 | year),
                  data = t.data)

order_sev <- c("unif_dem", "unif_rep", "partisan_shift2", "gdp_per", "tot_tax_pop",
               "prop_tax", "gasprod06_adjusted", "oilprod06_adjusted", "unemp")
names_sev <- c("Unified Democratic", "Unified Republican", "Partisanship",
               "GDP per capita", "Tax revenue (per capita)",
               "Property Tax Exemption", "Gas Production (pre-period)",
               "Oil Production (pre-period)", "Unemployment")

cat("\n\n===== TABLE 5: Predictors of Severance Tax =====\n")
stargazer(sev.mlm.fe, sev.mlm.fe2, sev.mlm4,
          no.space = TRUE, title = "Predictors of Severance Tax", type = "text",
          digits = 2, covariate.labels = names_sev,
          dep.var.caption = "Tax on Oil and Gas Extraction", dep.var.labels.include = FALSE,
          order = order_sev, keep.stat = "n", omit = c("time", "Constant"))


################################################################
### Done
################################################################
cat("\n\nReplication complete. Figures saved to figures/ directory.\n")
cat("Regression tables printed above (text format).\n")
cat("To generate LaTeX tables, change type='text' to type='latex' in stargazer() calls.\n")
